package com.ld.shieldsb.common.core.util.sensitiveWord.impl;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.ld.shieldsb.common.core.util.SensitiveWordUtil;
import com.ld.shieldsb.common.core.util.StringUtils;
import com.ld.shieldsb.common.core.util.sensitiveWord.SensitiveWordHandler;
import com.ld.shieldsb.common.core.util.sensitiveWord.SensitiveWordResult;

import lombok.Data;

public class TextSensitiveWordHandler implements SensitiveWordHandler {

	public SensitiveWordResult deal(String value, String replaceChar) {
		SensitiveWordResult result = new SensitiveWordResult();
		boolean contains = SensitiveWordUtil.contains(value);
		if (contains) {
			String sensitiveWord = SensitiveWordUtil.getSensitiveWordStr(value.toString());
			result.setSensitiveWordStr(sensitiveWord);
			result.setFilteredContent(SensitiveWordUtil.replaceSensitiveWord(value, replaceChar));
		}
		result.setOriginalContent(value);
		result.setContains(contains);
		return result;
	}

	public static void main(String[] args) {
		Set<String> sensitiveWordSet = new HashSet<>();
		sensitiveWordSet.add("一党专制");
		// 初始化敏感词库
		SensitiveWordUtil.init(sensitiveWordSet);
		String str = "一_党_专_制_的国家";
		SensitiveWordResult result = 	TextSensitiveWordHandler.filter(filterPunctation(str), "*");
		System.err.println(result);
	}

	@SuppressWarnings("rawtypes")
	private static SensitiveWordResult filter(PunctuationOrHtmlFilteredResult pohResult, String replacement) {
		StringBuffer sentence = pohResult.getFilteredString();
		ArrayList<Integer> charOffsets = pohResult.getCharOffsets();
		StringBuffer resultString = new StringBuffer(pohResult.getOriginalString());
		StringBuffer badWords = new StringBuffer();
		  int start = 0;
		  int end = 0;
        for (int i = 0; i < sentence.length(); i++) {
            char word;
            start = i;
            end = i;
            Map nowMap = SensitiveWordUtil.sensitiveWordMap;
            for (int j = i; j < sentence.length();j++) {
                word = sentence.charAt(j);
                // 获取指定key
                nowMap = (Map) nowMap.get(word);
                if (nowMap != null) {// 存在，则判断是否为最后一个
                    // 找到相应key，匹配标识+1
                    // 如果为最后一个匹配规则,结束循环，返回匹配标识数
                    if ("1".equals(nowMap.get("isEnd"))) {
                        // 结束标志位为true
                    	  end = j;
                    }
                } else {// 不存在，直接返回
                    break;
                }
            }
            
            if (end > start) {
                for (int k = start; k <= end; k++) {
                    resultString.setCharAt(charOffsets.get(k), replacement.charAt(0)); //是敏感词的用符号替换掉
                }
                if (badWords.length() > 0) {
                    badWords.append(",");
                }
                badWords.append(sentence.substring(start, end + 1) );
                i = end;
            }
            
        }
		SensitiveWordResult result = new SensitiveWordResult();
		result.setOriginalContent(pohResult.getOriginalString());
		result.setFilteredContent(resultString.toString());
		result.setSensitiveWordStr(badWords.toString());
		if(StringUtils.isNotEmpty(badWords.toString()))
		{
			result.setContains(true);
		}else
		{
			result.setContains(false);
		}
		return result;
	}

	
    private static PunctuationOrHtmlFilteredResult filterPunctation(String originalString) {
        StringBuffer filteredString = new StringBuffer();
        ArrayList<Integer> charOffsets = new ArrayList<Integer>();
        for (int i = 0; i < originalString.length(); i++) {
            String c = String.valueOf(originalString.charAt(i));
            if (!isPunctuationChar(c)) {
                filteredString.append(c);
                charOffsets.add(i);
            }
        }
        PunctuationOrHtmlFilteredResult result = new PunctuationOrHtmlFilteredResult();
        result.setOriginalString(originalString);
        result.setFilteredString(filteredString);
        result.setCharOffsets(charOffsets);
        return result;
    }
    
	//用 Unicode 表示的正则,匹配各种除字母以外的符号
	//大写 P 表示 Unicode 字符集七个字符属性之一：标点字符。
	//L：字母；
	//M：标记符号（一般不会单独出现）；
	//Z：分隔符（比如空格、换行等）；
	//S：符号（比如数学符号、货币符号等）；
	//N：数字（比如阿拉伯数字、罗马数字等）；
	//C：其他字符
	private static boolean isPunctuationChar(String c) {
		String regex = "[\\pP\\pZ\\pS\\pM\\pC]";
		Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
		Matcher m = p.matcher(c);
		return m.find();
	}

	@Data
	private static class PunctuationOrHtmlFilteredResult {
		private String originalString;  //原始字符串
		private StringBuffer filteredString; //过滤掉html的字符串
		private ArrayList<Integer> charOffsets; //字节

	}

}
